In [ ]:
# Copyright 2019 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
1. Familiar with Python
2. Completed Handbook 1/Part 4: Advanced Convolutional Neural Networks
1. Architecture Changes - Pre-stems
2. Dense connections across sublayers in DenseNet
3. Xception Redesigned Macro-Architecture for CNN
Let's create a pre-stem to handle an input size different than what the neural network was designed for.
We will use these approaches:
1. Calculate the difference in size between the expected input and the actual size of
the input (in our case we are assuming actual size less than expected size).
A. Expected = (230, 230, 3)
B. Actual = (224, 224, 3)
2. Pad the inputs to fit into the expected size.
You fill in the blanks (replace the ??), make sure it passes the Python interpreter, and then verify it's correctness with the summary output.
You will need to:
1. Set the padding of the image prior to the first convolution.
In [ ]:
from keras import layers, Input
# Not the input shape expected by the stem (which is (230, 230, 3)
inputs = Input(shape=(224, 224, 3))
# Add a pre-stem and pad (224, 224, 3) to (230, 230, 3)
# HINT: Since the pad is on both sides (left/right, top/bottom) you want to divide the
# difference by two (half goes to the left, half goes to the right, etc)
inputs = layers.ZeroPadding2D(??)(inputs)
# This stem's expected shape is (230, 230, 3)
x = layers.Conv2D(64, (7, 7), strides=(2,2))(inputs)
X = layers.BatchNormalization()(x)
x = layers.ReLU()(x)
In [ ]:
# this will output: (230, 230, 3)
print("inputs", inputs.shape)
# this will output: (?, 112, 112, 64)
print("outputs", x.shape)
Let's create a DenseNet-121:
We will use these approaches:
1. Add a pre-stem step of padding by 1 pixel so a 230x230x3 input results in 7x7
feature maps at the global average (bottleneck) layer.
2. Use average pooling (subsamnpling) in transition blocks.
3. Accumulated feature maps through residual blocks by concatenting the input to the
output, and making that the new output.
4. Use compression to reduce feature map sizes between dense blocks.
You will need to:
1. Set the padding in the stem group.
2. Concatenate the input and output at each residual block.
3. Set the compression (reduction) of filters in the transition block.
4. Use average pooling in transition block.
In [ ]:
from keras import layers, Input, Model
def stem(inputs):
""" The Stem Convolution Group
inputs : input tensor
"""
# First large convolution for abstract features for input 230 x 230 and output
# 112 x 112
x = layers.Conv2D(64, (7, 7), strides=2)(inputs)
x = layers.BatchNormalization()(x)
x = layers.ReLU()(x)
# Add padding so when downsampling we fit shape 56 x 56
# Hint: we want to pad one pixel all around.
x = layers.ZeroPadding2D(padding=(??, ??)(x)
x = layers.MaxPooling2D((3, 3), strides=2)(x)
return x
def dense_block(x, nblocks, nb_filters):
""" Construct a Dense Block
x : input layer
nblocks : number of residual blocks in dense block
nb_filters: number of filters in convolution layer in residual block
"""
# Construct a group of residual blocks
for _ in range(nblocks):
x = residual_block(x, nb_filters)
return x
def residual_block(x, nb_filters):
""" Construct Residual Block
x : input layer
nb_filters: number of filters in convolution layer in residual block
"""
shortcut = x # remember input tensor into residual block
# Bottleneck convolution, expand filters by 4 (DenseNet-B)
x = layers.Conv2D(4 * nb_filters, (1, 1), strides=(1, 1))(x)
x = layers.BatchNormalization()(x)
x = layers.ReLU()(x)
# 3 x 3 convolution with padding=same to preserve same shape of feature maps
x = layers.Conv2D(nb_filters, (3, 3), strides=(1, 1), padding='same')(x)
x = layers.BatchNormalization()(x)
x = layers.ReLU()(x)
# Concatenate the input (identity) with the output of the residual block
# Concatenation (vs. merging) provides Feature Reuse between layers
# HINT: Use a list which includes the remembered input and the output from the residual block - which becomes the new output
x = layers.concatenate([??])
return x
def trans_block(x, reduce_by):
""" Construct a Transition Block
x : input layer
reduce_by: percentage of reduction of feature maps
"""
# Reduce (compression) the number of feature maps (DenseNet-C)
# shape[n] returns a class object. We use int() to cast it into the dimension
# size
# HINT: the compression is a percentage (~0.5) that was passed as a parameter to this function
nb_filters = int( int(x.shape[3]) * ?? )
# Bottleneck convolution
x = layers.Conv2D(nb_filters, (1, 1), strides=(1, 1))(x)
x = layers.BatchNormalization()(x)
x = layers.ReLU()(x)
# Use mean value (average) instead of max value sampling when pooling
# reduce by 75%
# HINT: instead of Max Pooling (downsampling) we use Average Pooling (subsampling)
x = layers.??Pooling2D((2, 2), strides=(2, 2))(x)
return x
inputs = Input(shape=(230, 230, 3))
# Create the Stem Convolution Group
x = stem(inputs)
# number of residual blocks in each dense block
blocks = [6, 12, 24, 16]
# pop off the list the last dense block
last = blocks.pop()
# amount to reduce feature maps by (compression) during transition blocks
reduce_by = 0.5
# number of filters in a convolution block within a residual block
nb_filters = 32
# Create the dense blocks and interceding transition blocks
for nblocks in blocks:
x = dense_block(x, nblocks, nb_filters)
x = trans_block(x, reduce_by)
# Add the last dense block w/o a following transition block
x = dense_block(x, last, nb_filters)
# Classifier
# Global Average Pooling will flatten the 7x7 feature maps into 1D feature maps
x = layers.GlobalAveragePooling2D()(x)
# Fully connected output layer (classification)
outputs = x = layers.Dense(1000, activation='softmax')(x)
model = Model(inputs, outputs)
It should look like below:
Layer (type) Output Shape Param # Connected to
==================================================================================================
input_3 (InputLayer) (None, 230, 230, 3) 0
__________________________________________________________________________________________________
conv2d_241 (Conv2D) (None, 112, 112, 64) 9472 input_3[0][0]
__________________________________________________________________________________________________
batch_normalization_241 (BatchN (None, 112, 112, 64) 256 conv2d_241[0][0]
__________________________________________________________________________________________________
re_lu_241 (ReLU) (None, 112, 112, 64) 0 batch_normalization_241[0][0]
__________________________________________________________________________________________________
zero_padding2d_2 (ZeroPadding2D (None, 114, 114, 64) 0 re_lu_241[0][0]
__________________________________________________________________________________________________
max_pooling2d_3 (MaxPooling2D) (None, 56, 56, 64) 0 zero_padding2d_2[0][0]
__________________________________________________________________________________________________
conv2d_242 (Conv2D) (None, 56, 56, 128) 8320 max_pooling2d_3[0][0]
__________________________________________________________________________________________________
batch_normalization_242 (BatchN (None, 56, 56, 128) 512 conv2d_242[0][0]
__________________________________________________________________________________________________
re_lu_242 (ReLU) (None, 56, 56, 128) 0 batch_normalization_242[0][0]
__________________________________________________________________________________________________
conv2d_243 (Conv2D) (None, 56, 56, 32) 36896 re_lu_242[0][0]
__________________________________________________________________________________________________
batch_normalization_243 (BatchN (None, 56, 56, 32) 128 conv2d_243[0][0]
__________________________________________________________________________________________________
re_lu_243 (ReLU) (None, 56, 56, 32) 0 batch_normalization_243[0][0]
__________________________________________________________________________________________________
concatenate_117 (Concatenate) (None, 56, 56, 96) 0 max_pooling2d_3[0][0]
re_lu_243[0][0]
__________________________________________________________________________________________________
conv2d_244 (Conv2D) (None, 56, 56, 128) 12416 concatenate_117[0][0]
__________________________________________________________________________________________________
batch_normalization_244 (BatchN (None, 56, 56, 128) 512 conv2d_244[0][0]
__________________________________________________________________________________________________
re_lu_244 (ReLU) (None, 56, 56, 128) 0 batch_normalization_244[0][0]
__________________________________________________________________________________________________
conv2d_245 (Conv2D) (None, 56, 56, 32) 36896 re_lu_244[0][0]
__________________________________________________________________________________________________
batch_normalization_245 (BatchN (None, 56, 56, 32) 128 conv2d_245[0][0]
__________________________________________________________________________________________________
re_lu_245 (ReLU) (None, 56, 56, 32) 0 batch_normalization_245[0][0]
__________________________________________________________________________________________________
concatenate_118 (Concatenate) (None, 56, 56, 128) 0 concatenate_117[0][0]
re_lu_245[0][0]
__________________________________________________________________________________________________
conv2d_246 (Conv2D) (None, 56, 56, 128) 16512 concatenate_118[0][0]
__________________________________________________________________________________________________
batch_normalization_246 (BatchN (None, 56, 56, 128) 512 conv2d_246[0][0]
__________________________________________________________________________________________________
re_lu_246 (ReLU) (None, 56, 56, 128) 0 batch_normalization_246[0][0]
__________________________________________________________________________________________________
conv2d_247 (Conv2D) (None, 56, 56, 32) 36896 re_lu_246[0][0]
__________________________________________________________________________________________________
batch_normalization_247 (BatchN (None, 56, 56, 32) 128 conv2d_247[0][0]
__________________________________________________________________________________________________
re_lu_247 (ReLU) (None, 56, 56, 32) 0 batch_normalization_247[0][0]
__________________________________________________________________________________________________
concatenate_119 (Concatenate) (None, 56, 56, 160) 0 concatenate_118[0][0]
re_lu_247[0][0]
__________________________________________________________________________________________________
conv2d_248 (Conv2D) (None, 56, 56, 128) 20608 concatenate_119[0][0]
__________________________________________________________________________________________________
batch_normalization_248 (BatchN (None, 56, 56, 128) 512 conv2d_248[0][0]
__________________________________________________________________________________________________
re_lu_248 (ReLU) (None, 56, 56, 128) 0 batch_normalization_248[0][0]
__________________________________________________________________________________________________
conv2d_249 (Conv2D) (None, 56, 56, 32) 36896 re_lu_248[0][0]
__________________________________________________________________________________________________
batch_normalization_249 (BatchN (None, 56, 56, 32) 128 conv2d_249[0][0]
__________________________________________________________________________________________________
re_lu_249 (ReLU) (None, 56, 56, 32) 0 batch_normalization_249[0][0]
__________________________________________________________________________________________________
concatenate_120 (Concatenate) (None, 56, 56, 192) 0 concatenate_119[0][0]
re_lu_249[0][0]
__________________________________________________________________________________________________
conv2d_250 (Conv2D) (None, 56, 56, 128) 24704 concatenate_120[0][0]
__________________________________________________________________________________________________
batch_normalization_250 (BatchN (None, 56, 56, 128) 512 conv2d_250[0][0]
__________________________________________________________________________________________________
re_lu_250 (ReLU) (None, 56, 56, 128) 0 batch_normalization_250[0][0]
__________________________________________________________________________________________________
conv2d_251 (Conv2D) (None, 56, 56, 32) 36896 re_lu_250[0][0]
__________________________________________________________________________________________________
batch_normalization_251 (BatchN (None, 56, 56, 32) 128 conv2d_251[0][0]
__________________________________________________________________________________________________
re_lu_251 (ReLU) (None, 56, 56, 32) 0 batch_normalization_251[0][0]
__________________________________________________________________________________________________
concatenate_121 (Concatenate) (None, 56, 56, 224) 0 concatenate_120[0][0]
re_lu_251[0][0]
__________________________________________________________________________________________________
conv2d_252 (Conv2D) (None, 56, 56, 128) 28800 concatenate_121[0][0]
__________________________________________________________________________________________________
batch_normalization_252 (BatchN (None, 56, 56, 128) 512 conv2d_252[0][0]
__________________________________________________________________________________________________
re_lu_252 (ReLU) (None, 56, 56, 128) 0 batch_normalization_252[0][0]
__________________________________________________________________________________________________
conv2d_253 (Conv2D) (None, 56, 56, 32) 36896 re_lu_252[0][0]
__________________________________________________________________________________________________
batch_normalization_253 (BatchN (None, 56, 56, 32) 128 conv2d_253[0][0]
__________________________________________________________________________________________________
re_lu_253 (ReLU) (None, 56, 56, 32) 0 batch_normalization_253[0][0]
__________________________________________________________________________________________________
concatenate_122 (Concatenate) (None, 56, 56, 256) 0 concatenate_121[0][0]
re_lu_253[0][0]
__________________________________________________________________________________________________
conv2d_254 (Conv2D) (None, 56, 56, 128) 32896 concatenate_122[0][0]
__________________________________________________________________________________________________
batch_normalization_254 (BatchN (None, 56, 56, 128) 512 conv2d_254[0][0]
__________________________________________________________________________________________________
re_lu_254 (ReLU) (None, 56, 56, 128) 0 batch_normalization_254[0][0]
REMOVED for BREVITY ...
__________________________________________________________________________________________________
average_pooling2d_9 (AveragePoo (None, 7, 7, 512) 0 re_lu_328[0][0]
__________________________________________________________________________________________________
conv2d_329 (Conv2D) (None, 7, 7, 128) 65664 average_pooling2d_9[0][0]
__________________________________________________________________________________________________
batch_normalization_329 (BatchN (None, 7, 7, 128) 512 conv2d_329[0][0]
__________________________________________________________________________________________________
re_lu_329 (ReLU) (None, 7, 7, 128) 0 batch_normalization_329[0][0]
__________________________________________________________________________________________________
conv2d_330 (Conv2D) (None, 7, 7, 32) 36896 re_lu_329[0][0]
__________________________________________________________________________________________________
batch_normalization_330 (BatchN (None, 7, 7, 32) 128 conv2d_330[0][0]
__________________________________________________________________________________________________
re_lu_330 (ReLU) (None, 7, 7, 32) 0 batch_normalization_330[0][0]
__________________________________________________________________________________________________
concatenate_159 (Concatenate) (None, 7, 7, 544) 0 average_pooling2d_9[0][0]
re_lu_330[0][0]
__________________________________________________________________________________________________
conv2d_331 (Conv2D) (None, 7, 7, 128) 69760 concatenate_159[0][0]
__________________________________________________________________________________________________
batch_normalization_331 (BatchN (None, 7, 7, 128) 512 conv2d_331[0][0]
__________________________________________________________________________________________________
re_lu_331 (ReLU) (None, 7, 7, 128) 0 batch_normalization_331[0][0]
__________________________________________________________________________________________________
conv2d_332 (Conv2D) (None, 7, 7, 32) 36896 re_lu_331[0][0]
__________________________________________________________________________________________________
batch_normalization_332 (BatchN (None, 7, 7, 32) 128 conv2d_332[0][0]
__________________________________________________________________________________________________
re_lu_332 (ReLU) (None, 7, 7, 32) 0 batch_normalization_332[0][0]
__________________________________________________________________________________________________
concatenate_160 (Concatenate) (None, 7, 7, 576) 0 concatenate_159[0][0]
re_lu_332[0][0]
__________________________________________________________________________________________________
conv2d_333 (Conv2D) (None, 7, 7, 128) 73856 concatenate_160[0][0]
__________________________________________________________________________________________________
batch_normalization_333 (BatchN (None, 7, 7, 128) 512 conv2d_333[0][0]
__________________________________________________________________________________________________
re_lu_333 (ReLU) (None, 7, 7, 128) 0 batch_normalization_333[0][0]
__________________________________________________________________________________________________
conv2d_334 (Conv2D) (None, 7, 7, 32) 36896 re_lu_333[0][0]
__________________________________________________________________________________________________
batch_normalization_334 (BatchN (None, 7, 7, 32) 128 conv2d_334[0][0]
__________________________________________________________________________________________________
re_lu_334 (ReLU) (None, 7, 7, 32) 0 batch_normalization_334[0][0]
__________________________________________________________________________________________________
concatenate_161 (Concatenate) (None, 7, 7, 608) 0 concatenate_160[0][0]
re_lu_334[0][0]
__________________________________________________________________________________________________
conv2d_335 (Conv2D) (None, 7, 7, 128) 77952 concatenate_161[0][0]
__________________________________________________________________________________________________
batch_normalization_335 (BatchN (None, 7, 7, 128) 512 conv2d_335[0][0]
__________________________________________________________________________________________________
re_lu_335 (ReLU) (None, 7, 7, 128) 0 batch_normalization_335[0][0]
__________________________________________________________________________________________________
conv2d_336 (Conv2D) (None, 7, 7, 32) 36896 re_lu_335[0][0]
__________________________________________________________________________________________________
batch_normalization_336 (BatchN (None, 7, 7, 32) 128 conv2d_336[0][0]
__________________________________________________________________________________________________
re_lu_336 (ReLU) (None, 7, 7, 32) 0 batch_normalization_336[0][0]
__________________________________________________________________________________________________
concatenate_162 (Concatenate) (None, 7, 7, 640) 0 concatenate_161[0][0]
re_lu_336[0][0]
__________________________________________________________________________________________________
conv2d_337 (Conv2D) (None, 7, 7, 128) 82048 concatenate_162[0][0]
__________________________________________________________________________________________________
batch_normalization_337 (BatchN (None, 7, 7, 128) 512 conv2d_337[0][0]
__________________________________________________________________________________________________
re_lu_337 (ReLU) (None, 7, 7, 128) 0 batch_normalization_337[0][0]
__________________________________________________________________________________________________
conv2d_338 (Conv2D) (None, 7, 7, 32) 36896 re_lu_337[0][0]
__________________________________________________________________________________________________
batch_normalization_338 (BatchN (None, 7, 7, 32) 128 conv2d_338[0][0]
__________________________________________________________________________________________________
re_lu_338 (ReLU) (None, 7, 7, 32) 0 batch_normalization_338[0][0]
__________________________________________________________________________________________________
concatenate_163 (Concatenate) (None, 7, 7, 672) 0 concatenate_162[0][0]
re_lu_338[0][0]
__________________________________________________________________________________________________
conv2d_339 (Conv2D) (None, 7, 7, 128) 86144 concatenate_163[0][0]
__________________________________________________________________________________________________
batch_normalization_339 (BatchN (None, 7, 7, 128) 512 conv2d_339[0][0]
__________________________________________________________________________________________________
re_lu_339 (ReLU) (None, 7, 7, 128) 0 batch_normalization_339[0][0]
__________________________________________________________________________________________________
conv2d_340 (Conv2D) (None, 7, 7, 32) 36896 re_lu_339[0][0]
__________________________________________________________________________________________________
batch_normalization_340 (BatchN (None, 7, 7, 32) 128 conv2d_340[0][0]
__________________________________________________________________________________________________
re_lu_340 (ReLU) (None, 7, 7, 32) 0 batch_normalization_340[0][0]
__________________________________________________________________________________________________
concatenate_164 (Concatenate) (None, 7, 7, 704) 0 concatenate_163[0][0]
re_lu_340[0][0]
__________________________________________________________________________________________________
conv2d_341 (Conv2D) (None, 7, 7, 128) 90240 concatenate_164[0][0]
__________________________________________________________________________________________________
batch_normalization_341 (BatchN (None, 7, 7, 128) 512 conv2d_341[0][0]
__________________________________________________________________________________________________
re_lu_341 (ReLU) (None, 7, 7, 128) 0 batch_normalization_341[0][0]
__________________________________________________________________________________________________
conv2d_342 (Conv2D) (None, 7, 7, 32) 36896 re_lu_341[0][0]
__________________________________________________________________________________________________
batch_normalization_342 (BatchN (None, 7, 7, 32) 128 conv2d_342[0][0]
__________________________________________________________________________________________________
re_lu_342 (ReLU) (None, 7, 7, 32) 0 batch_normalization_342[0][0]
__________________________________________________________________________________________________
concatenate_165 (Concatenate) (None, 7, 7, 736) 0 concatenate_164[0][0]
re_lu_342[0][0]
__________________________________________________________________________________________________
conv2d_343 (Conv2D) (None, 7, 7, 128) 94336 concatenate_165[0][0]
__________________________________________________________________________________________________
batch_normalization_343 (BatchN (None, 7, 7, 128) 512 conv2d_343[0][0]
__________________________________________________________________________________________________
re_lu_343 (ReLU) (None, 7, 7, 128) 0 batch_normalization_343[0][0]
__________________________________________________________________________________________________
conv2d_344 (Conv2D) (None, 7, 7, 32) 36896 re_lu_343[0][0]
__________________________________________________________________________________________________
batch_normalization_344 (BatchN (None, 7, 7, 32) 128 conv2d_344[0][0]
__________________________________________________________________________________________________
re_lu_344 (ReLU) (None, 7, 7, 32) 0 batch_normalization_344[0][0]
__________________________________________________________________________________________________
concatenate_166 (Concatenate) (None, 7, 7, 768) 0 concatenate_165[0][0]
re_lu_344[0][0]
__________________________________________________________________________________________________
conv2d_345 (Conv2D) (None, 7, 7, 128) 98432 concatenate_166[0][0]
__________________________________________________________________________________________________
batch_normalization_345 (BatchN (None, 7, 7, 128) 512 conv2d_345[0][0]
__________________________________________________________________________________________________
re_lu_345 (ReLU) (None, 7, 7, 128) 0 batch_normalization_345[0][0]
__________________________________________________________________________________________________
conv2d_346 (Conv2D) (None, 7, 7, 32) 36896 re_lu_345[0][0]
__________________________________________________________________________________________________
batch_normalization_346 (BatchN (None, 7, 7, 32) 128 conv2d_346[0][0]
__________________________________________________________________________________________________
re_lu_346 (ReLU) (None, 7, 7, 32) 0 batch_normalization_346[0][0]
__________________________________________________________________________________________________
concatenate_167 (Concatenate) (None, 7, 7, 800) 0 concatenate_166[0][0]
re_lu_346[0][0]
__________________________________________________________________________________________________
conv2d_347 (Conv2D) (None, 7, 7, 128) 102528 concatenate_167[0][0]
__________________________________________________________________________________________________
batch_normalization_347 (BatchN (None, 7, 7, 128) 512 conv2d_347[0][0]
__________________________________________________________________________________________________
re_lu_347 (ReLU) (None, 7, 7, 128) 0 batch_normalization_347[0][0]
__________________________________________________________________________________________________
conv2d_348 (Conv2D) (None, 7, 7, 32) 36896 re_lu_347[0][0]
__________________________________________________________________________________________________
batch_normalization_348 (BatchN (None, 7, 7, 32) 128 conv2d_348[0][0]
__________________________________________________________________________________________________
re_lu_348 (ReLU) (None, 7, 7, 32) 0 batch_normalization_348[0][0]
__________________________________________________________________________________________________
concatenate_168 (Concatenate) (None, 7, 7, 832) 0 concatenate_167[0][0]
re_lu_348[0][0]
__________________________________________________________________________________________________
conv2d_349 (Conv2D) (None, 7, 7, 128) 106624 concatenate_168[0][0]
__________________________________________________________________________________________________
batch_normalization_349 (BatchN (None, 7, 7, 128) 512 conv2d_349[0][0]
__________________________________________________________________________________________________
re_lu_349 (ReLU) (None, 7, 7, 128) 0 batch_normalization_349[0][0]
__________________________________________________________________________________________________
conv2d_350 (Conv2D) (None, 7, 7, 32) 36896 re_lu_349[0][0]
__________________________________________________________________________________________________
batch_normalization_350 (BatchN (None, 7, 7, 32) 128 conv2d_350[0][0]
__________________________________________________________________________________________________
re_lu_350 (ReLU) (None, 7, 7, 32) 0 batch_normalization_350[0][0]
__________________________________________________________________________________________________
concatenate_169 (Concatenate) (None, 7, 7, 864) 0 concatenate_168[0][0]
re_lu_350[0][0]
__________________________________________________________________________________________________
conv2d_351 (Conv2D) (None, 7, 7, 128) 110720 concatenate_169[0][0]
__________________________________________________________________________________________________
batch_normalization_351 (BatchN (None, 7, 7, 128) 512 conv2d_351[0][0]
__________________________________________________________________________________________________
re_lu_351 (ReLU) (None, 7, 7, 128) 0 batch_normalization_351[0][0]
__________________________________________________________________________________________________
conv2d_352 (Conv2D) (None, 7, 7, 32) 36896 re_lu_351[0][0]
__________________________________________________________________________________________________
batch_normalization_352 (BatchN (None, 7, 7, 32) 128 conv2d_352[0][0]
__________________________________________________________________________________________________
re_lu_352 (ReLU) (None, 7, 7, 32) 0 batch_normalization_352[0][0]
__________________________________________________________________________________________________
concatenate_170 (Concatenate) (None, 7, 7, 896) 0 concatenate_169[0][0]
re_lu_352[0][0]
__________________________________________________________________________________________________
conv2d_353 (Conv2D) (None, 7, 7, 128) 114816 concatenate_170[0][0]
__________________________________________________________________________________________________
batch_normalization_353 (BatchN (None, 7, 7, 128) 512 conv2d_353[0][0]
__________________________________________________________________________________________________
re_lu_353 (ReLU) (None, 7, 7, 128) 0 batch_normalization_353[0][0]
__________________________________________________________________________________________________
conv2d_354 (Conv2D) (None, 7, 7, 32) 36896 re_lu_353[0][0]
__________________________________________________________________________________________________
batch_normalization_354 (BatchN (None, 7, 7, 32) 128 conv2d_354[0][0]
__________________________________________________________________________________________________
re_lu_354 (ReLU) (None, 7, 7, 32) 0 batch_normalization_354[0][0]
__________________________________________________________________________________________________
concatenate_171 (Concatenate) (None, 7, 7, 928) 0 concatenate_170[0][0]
re_lu_354[0][0]
__________________________________________________________________________________________________
conv2d_355 (Conv2D) (None, 7, 7, 128) 118912 concatenate_171[0][0]
__________________________________________________________________________________________________
batch_normalization_355 (BatchN (None, 7, 7, 128) 512 conv2d_355[0][0]
__________________________________________________________________________________________________
re_lu_355 (ReLU) (None, 7, 7, 128) 0 batch_normalization_355[0][0]
__________________________________________________________________________________________________
conv2d_356 (Conv2D) (None, 7, 7, 32) 36896 re_lu_355[0][0]
__________________________________________________________________________________________________
batch_normalization_356 (BatchN (None, 7, 7, 32) 128 conv2d_356[0][0]
__________________________________________________________________________________________________
re_lu_356 (ReLU) (None, 7, 7, 32) 0 batch_normalization_356[0][0]
__________________________________________________________________________________________________
concatenate_172 (Concatenate) (None, 7, 7, 960) 0 concatenate_171[0][0]
re_lu_356[0][0]
__________________________________________________________________________________________________
conv2d_357 (Conv2D) (None, 7, 7, 128) 123008 concatenate_172[0][0]
__________________________________________________________________________________________________
batch_normalization_357 (BatchN (None, 7, 7, 128) 512 conv2d_357[0][0]
__________________________________________________________________________________________________
re_lu_357 (ReLU) (None, 7, 7, 128) 0 batch_normalization_357[0][0]
__________________________________________________________________________________________________
conv2d_358 (Conv2D) (None, 7, 7, 32) 36896 re_lu_357[0][0]
__________________________________________________________________________________________________
batch_normalization_358 (BatchN (None, 7, 7, 32) 128 conv2d_358[0][0]
__________________________________________________________________________________________________
re_lu_358 (ReLU) (None, 7, 7, 32) 0 batch_normalization_358[0][0]
__________________________________________________________________________________________________
concatenate_173 (Concatenate) (None, 7, 7, 992) 0 concatenate_172[0][0]
re_lu_358[0][0]
__________________________________________________________________________________________________
conv2d_359 (Conv2D) (None, 7, 7, 128) 127104 concatenate_173[0][0]
__________________________________________________________________________________________________
batch_normalization_359 (BatchN (None, 7, 7, 128) 512 conv2d_359[0][0]
__________________________________________________________________________________________________
re_lu_359 (ReLU) (None, 7, 7, 128) 0 batch_normalization_359[0][0]
__________________________________________________________________________________________________
conv2d_360 (Conv2D) (None, 7, 7, 32) 36896 re_lu_359[0][0]
__________________________________________________________________________________________________
batch_normalization_360 (BatchN (None, 7, 7, 32) 128 conv2d_360[0][0]
__________________________________________________________________________________________________
re_lu_360 (ReLU) (None, 7, 7, 32) 0 batch_normalization_360[0][0]
__________________________________________________________________________________________________
concatenate_174 (Concatenate) (None, 7, 7, 1024) 0 concatenate_173[0][0]
re_lu_360[0][0]
__________________________________________________________________________________________________
global_average_pooling2d_3 (Glo (None, 1024) 0 concatenate_174[0][0]
__________________________________________________________________________________________________
dense_3 (Dense) (None, 1000) 1025000 global_average_pooling2d_3[0][0]
==================================================================================================
Total params: 7,946,408
Trainable params: 7,925,928
Non-trainable params: 20,480
__________________________________________________________________________________________________
In [ ]:
model.summary()
Let's layout a CNN using the Xception architecture pattern.
We will use these approaches:
1. Decompose into a stem, entrance, middle and exit module.
2. Stem does the initial sequential convolutional layers for the input.
3. Entrance does the coarse filter learning.
4. Middle does the detail filter learning.
5. Exit does the classification.
We won't build a full Xception, just a mini-example to practice the layout.
You will need to:
1. Use a strided convolution in the stem group.
2. Set the number of residual blocks in the residual groups in the middle flow.
3. Use global averaging in the classifier.
4. Set the input to the project link in the residual blocks in the entry flow.
5. Remember the input to the residual blocks in the middle flow.
In [ ]:
from keras import layers, Input, Model
def entryFlow(inputs):
""" Create the entry flow section
inputs : input tensor to neural network
"""
def stem(inputs):
""" Create the stem entry into the neural network
inputs : input tensor to neural network
"""
# The stem uses two 3x3 convolutions.
# The first one downsamples and the second one doubles the number of filters
# First convolution
x = layers.Conv2D(32, (3, 3), strides=(2, 2))(inputs)
x = layers.BatchNormalization()(x)
x = layers.ReLU()(x)
# Second convolution, double the number of filters (no downsampling)
# HINT: when stride > 1 you are downsampling (also known as strided convolution)
x = layers.Conv2D(??, (3, 3), strides=??)(inputs)
x = layers.BatchNormalization()(x)
x = layers.ReLU()(x)
return x
# Create the stem to the neural network
x = stem(inputs)
# Create three residual blocks
for nb_filters in [128, 256, 728]:
x = residual_block_entry(x, nb_filters)
return x
def middleFlow(x):
""" Create the middle flow section
x : input tensor into section
"""
# Create 8 residual blocks, each with 728 filters
for _ in range(8):
x = residual_block_middle(x, ??)
return x
def exitFlow(x):
""" Create the exit flow section
x : input tensor into section
"""
def classifier(x):
""" The output classifier
x : input tensor
"""
# Global Average Pooling will flatten the 10x10 feature maps into 1D
# feature maps
x = layers.??()(x)
# Fully connected output layer (classification)
x = layers.Dense(1000, activation='softmax')(x)
return x
shortcut = x
# First Depthwise Separable Convolution
x = layers.SeparableConv2D(728, (3, 3), padding='same')(x)
x = layers.BatchNormalization()(x)
# Second Depthwise Separable Convolution
x = layers.SeparableConv2D(1024, (3, 3), padding='same')(x)
x = layers.BatchNormalization()(x)
x = layers.ReLU()(x)
# Create pooled feature maps, reduce size by 75%
x = layers.MaxPooling2D((3, 3), strides=(2, 2), padding='same')(x)
# Add strided convolution to identity link to double number of filters to
# match output of residual block for the add operation
shortcut = layers.Conv2D(1024, (1, 1), strides=(2, 2),
padding='same')(shortcut)
shortcut = layers.BatchNormalization()(shortcut)
x = layers.add([x, shortcut])
# Third Depthwise Separable Convolution
x = layers.SeparableConv2D(1556, (3, 3), padding='same')(x)
x = layers.BatchNormalization()(x)
x = layers.ReLU()(x)
# Fourth Depthwise Separable Convolution
x = layers.SeparableConv2D(2048, (3, 3), padding='same')(x)
x = layers.BatchNormalization()(x)
x = layers.ReLU()(x)
# Create classifier section
x = classifier(x)
return x
def residual_block_entry(x, nb_filters):
""" Create a residual block using Depthwise Separable Convolutions
x : input into residual block
nb_filters: number of filters
"""
shortcut = x
# First Depthwise Separable Convolution
x = layers.SeparableConv2D(nb_filters, (3, 3), padding='same')(x)
x = layers.BatchNormalization()(x)
x = layers.ReLU()(x)
# Second depthwise Separable Convolution
x = layers.SeparableConv2D(nb_filters, (3, 3), padding='same')(x)
x = layers.BatchNormalization()(x)
x = layers.ReLU()(x)
# Create pooled feature maps, reduce size by 75%
x = layers.MaxPooling2D((3, 3), strides=(2, 2), padding='same')(x)
# Add strided convolution to identity link to double number of filters to
# match output of residual block for the add operation
# HINT: this is the identity branch, so what should be the input?
shortcut = layers.Conv2D(nb_filters, (1, 1), strides=(2, 2),
padding='same')(??)
shortcut = layers.BatchNormalization()(shortcut)
x = layers.add([x, shortcut])
return x
def residual_block_middle(x, nb_filters):
""" Create a residual block using Depthwise Separable Convolutions
x : input into residual block
nb_filters: number of filters
"""
# Remember to save the input for the identity link
# HINT: it's in the params!
shortcut = ??
# First Depthwise Separable Convolution
x = layers.SeparableConv2D(nb_filters, (3, 3), padding='same')(x)
x = layers.BatchNormalization()(x)
x = layers.ReLU()(x)
# Second depthwise Separable Convolution
x = layers.SeparableConv2D(nb_filters, (3, 3), padding='same')(x)
x = layers.BatchNormalization()(x)
x = layers.ReLU()(x)
# Third depthwise Separable Convolution
x = layers.SeparableConv2D(nb_filters, (3, 3), padding='same')(x)
x = layers.BatchNormalization()(x)
x = layers.ReLU()(x)
x = layers.add([x, shortcut])
return x
inputs = Input(shape=(299, 299, 3))
# Create entry section
x = entryFlow(inputs)
# Create the middle section
x = middleFlow(x)
# Create the exit section
outputs = exitFlow(x)
model = Model(inputs, outputs)
It should look (end) like below:
global_average_pooling2d_1 (Glo (None, 2048) 0 re_lu_37[0][0]
__________________________________________________________________________________________________
dense_1 (Dense) (None, 1000) 2049000 global_average_pooling2d_1[0][0]
==================================================================================================
Total params: 22,981,736
Trainable params: 22,927,232
Non-trainable params: 54,504
In [ ]:
model.summary()